In [1]:
library(ggplot2)
library(gdata)


gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.

gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.

Attaching package: ‘gdata’

The following object is masked from ‘package:stats’:

    nobs

The following object is masked from ‘package:utils’:

    object.size

The following object is masked from ‘package:base’:

    startsWith


In [2]:
D <- read.table("../Data/All_data.txt")
D <- D[order(D$Region,D$Disease),]

In [3]:
names(D)


  1. 'Region'
  2. 'Disease'
  3. 'burden_daly'
  4. 'burden_yll'
  5. 'burden_yld'
  6. 'burden_death'
  7. 'Prop_loc_burden_daly'
  8. 'Prop_loc_burden_yll'
  9. 'Prop_loc_burden_yld'
  10. 'Prop_loc_burden_death'
  11. 'Prop_glob_burden_daly'
  12. 'Prop_glob_burden_yll'
  13. 'Prop_glob_burden_yld'
  14. 'Prop_glob_burden_death'
  15. 'Prop_NHI_burden_daly'
  16. 'Prop_NHI_burden_yll'
  17. 'Prop_NHI_burden_yld'
  18. 'Prop_NHI_burden_death'
  19. 'Nb_RCTs_low'
  20. 'Nb_RCTs_med'
  21. 'Nb_RCTs_up'
  22. 'Nb_Patients_low'
  23. 'Nb_Patients_med'
  24. 'Nb_Patients_up'
  25. 'Prop_loc_RCTs_low'
  26. 'Prop_loc_RCTs_med'
  27. 'Prop_loc_RCTs_up'
  28. 'Prop_loc_Patients_low'
  29. 'Prop_loc_Patients_med'
  30. 'Prop_loc_Patients_up'
  31. 'Prop_glob_RCTs_low'
  32. 'Prop_glob_RCTs_med'
  33. 'Prop_glob_RCTs_up'
  34. 'Prop_glob_Patients_low'
  35. 'Prop_glob_Patients_med'
  36. 'Prop_glob_Patients_up'
  37. 'Prop_NHI_RCTs_low'
  38. 'Prop_NHI_RCTs_med'
  39. 'Prop_NHI_RCTs_up'
  40. 'Prop_NHI_Patients_low'
  41. 'Prop_NHI_Patients_med'
  42. 'Prop_NHI_Patients_up'

In [4]:
metr_burden <- "daly"
metr_res <- "RCTs"

In [5]:
#We compare RCTs to DALYs
dpl <- D[D$Region!="Non-HI",
          c(which(names(D)%in%c("Region","Disease")),
            intersect(grep(metr_burden,names(D)),grep("^burden",names(D))),
            intersect(grep(metr_res,names(D)),grep("^Nb",names(D)))),]

In [6]:
head(dpl)


RegionDiseaseburden_dalyNb_RCTs_lowNb_RCTs_medNb_RCTs_up
1All All 2220063510.8007678661.775 82179 85358.2
2All Cardiovascular and circulatory diseases287404109.09231 9526.975 10676 11943
3All Chronic respiratory diseases112485355.22285 4302.975 4773 5283
4All Cirrhosis of the liver30462721.1164 534.975 1061 1574
5All Congenital anomalies43254504.439 71 523 986
6All Diabetes, urinary diseases and male infertility75821480.094146 10613.975 11700.5 12854.05

In [7]:
#Order diseases: increasing burden
dis <- dpl$Disease[dpl$Region=="All"][order(dpl$burden[dpl$Region=="All"])]

In [8]:
dis


  1. Leprosy
  2. Sudden infant death syndrome
  3. Gynecological diseases
  4. Hepatitis
  5. Sexually transmitted diseases excluding HIV
  6. Oral disorders
  7. Hemoglobinopathies and hemolytic anemias
  8. Maternal disorders
  9. Neglected tropical diseases excluding malaria
  10. Cirrhosis of the liver
  11. Sense organ diseases
  12. Digestive diseases (except cirrhosis)
  13. Skin and subcutaneous diseases
  14. Congenital anomalies
  15. Tuberculosis
  16. Neurological disorders
  17. Diabetes, urinary diseases and male infertility
  18. Nutritional deficiencies
  19. HIV/AIDS
  20. Malaria
  21. Chronic respiratory diseases
  22. Musculoskeletal disorders
  23. Mental and behavioral disorders
  24. Neoplasms
  25. Neonatal disorders
  26. Cardiovascular and circulatory diseases
  27. Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases
  28. All

In [9]:
dis <- dis[dis!="All"]

In [10]:
#Number of RCTs per region
regs <- dpl$Region[dpl$Disease=="All"][order(dpl[dpl$Disease=="All",grep("med",names(dpl))],
                                                decreasing=TRUE)]

In [11]:
regs


  1. All
  2. High-income
  3. Southeast Asia, East Asia and Oceania
  4. North Africa and Middle East
  5. Central Europe, Eastern Europe, and Central Asia
  6. South Asia
  7. Latin America and Caribbean
  8. Sub-Saharian Africa

In [12]:
regs <- regs[regs!="All"]

In [13]:
#Region labels
reg_labs <- c("High-income countries",
              "Southeast Asia,\nEast Asia and Oceania",
              "North Africa and\nMiddle East", 
              "Eastern Europe\nand Central Asia",
              "South Asia", 
              "Latin America\nand Caribbean", 
              "Sub-Saharian\nAfrica")

In [14]:
dpl <- dpl[dpl$Region!="All" & dpl$Disease!="All",]

In [15]:
#Normalizing regions: max RCts = max GBD
Norm_fact <- max(dpl[,grep("up",names(dpl))],na.rm=TRUE)/max(dpl$burden)
dpl$gpl <- (dpl$burden/max(dpl$burden))*max(dpl[,grep("up",names(dpl))],na.rm=TRUE)

In [16]:
#Bar size = wdt*2
wdt <- 0.45
#Distance between regions (end to end)
d_reg <- 400
#Distance between center of region and start of bars (for disease labels)
esp_dis_nb <- 200
#Inner circle
IC <- 8

In [17]:
#Rectangles for a given region and disease
#Rg = central position of region
#d = name of the disease
#rg = name of the region
displt <- 
function(d,Rg,rg){
res_pl <- data.frame(  xmin = which(d==dis)-wdt,
                       xmax = which(d==dis)+wdt,
                       ymin = Rg+esp_dis_nb,
                       ymax = Rg+esp_dis_nb+dpl[dpl$Dis==d & dpl$Region==rg,grep("med",names(dpl))],
                       metr="Research",
                       reg=rg,
                       ycent=Rg,
                       dis_nb=which(d==dis),
                       disease=d)
burd_pl <- data.frame( xmin = which(d==dis)-wdt,
                       xmax = which(d==dis)+wdt,
                       ymin = Rg-esp_dis_nb,
                       ymax = Rg-esp_dis_nb-dpl$gpl[dpl$Dis==d & dpl$Region==rg],
                       metr="Burden",
                       reg=rg,
                       ycent=Rg,
                       dis_nb=which(d==dis),
                       disease=d)
rbind(res_pl,burd_pl)
}

In [18]:
displt_err <- function(d,Rg,rg){
    data.frame(x = which(d==dis),
               ymin = Rg+esp_dis_nb+dpl[dpl$Dis==d & dpl$Region==rg,grep("low",names(dpl))],
               ymax = Rg+esp_dis_nb+dpl[dpl$Dis==d & dpl$Region==rg,grep("up",names(dpl))],
               metr="Research",
               reg=rg,
               dis_nb=which(d==dis),
               disease=d)
}

In [19]:
#Rectangles pour toutes les maladies, une région donnée
regplt <- function(Rg,rg) do.call('rbind',lapply(dis,function(x){displt(x,Rg,rg)}))
regplt_err <- function(Rg,rg) do.call('rbind',lapply(dis,function(x){displt_err(x,Rg,rg)}))

In [20]:
#Emplacement des régions
RG <- 0
for(i in 2:length(regs)){
RG <- c(RG,
        RG[i-1]-(2*esp_dis_nb+
                 max(dpl$gpl[dpl$Region==regs[i-1]])+
                 d_reg+max(dpl[dpl$Region==regs[i],grep("up",names(dpl))],na.rm=TRUE)))
}

In [21]:
#DataFrame Plot
DPLOT <- do.call('rbind',lapply(1:length(regs),function(i){regplt(RG[i],regs[i])}))
#Error_bars dataframe
DPLOT_err <- do.call('rbind',lapply(1:length(regs),function(i){regplt_err(RG[i],regs[i])}))

In [22]:
#Inner circle
DPLOT$xmin <- DPLOT$xmin + IC
DPLOT$xmax <- DPLOT$xmax + IC
DPLOT$xcent <- DPLOT$dis_nb + IC
DPLOT_err$x <- DPLOT_err$x + IC
DPLOT_err$xcent <- DPLOT_err$dis_nb + IC

In [23]:
totalLength <- max(DPLOT_err$ymax,na.rm=TRUE)-min(DPLOT$ymax,na.rm=TRUE)+d_reg

In [24]:
#Polar coordinates
alphaStart <- 2*pi*((max(DPLOT_err$ymax[DPLOT_err$reg==regs[1]]+d_reg/2,na.rm=TRUE))/
                    totalLength)

In [25]:
#REGION LABELS
  readableAngle<-function(x){
    angle<-x*(360/totalLength)
  }
    familyLabelsDF<-data.frame(xmin=RG,label=reg_labs)
    familyLabelsDF$angle <- readableAngle(familyLabelsDF$xmin)

In [26]:
#Disease labels: size
DPLOT$size_dis_lab = 2.3*(40+DPLOT$dis_nb)/(40+max(DPLOT$dis_nb))

Research and burden tick marks


In [27]:
#Research
rcttks <- c(0,100,500,1000,2000,3000,5000,7500,10000)
maj_rcts <- function(nb){
    x <- nb
    k <- 0
    while(x>=100){x <- x%/%10
                  k <- k+1}
    (x+1)*10^k
}

In [28]:
#Faire que les ticks aillent jusqu'au max des RCTs arrondi au sup
RCTtcks <- do.call('rbind',lapply(regs,function(x){
data.frame(
    breaks = unique(DPLOT$ymin[DPLOT$metr=="Research" & DPLOT$reg==x]) + 
             c(rcttks[2:findInterval(max(dpl[dpl$Region==x,grep("up",names(dpl))],na.rm=TRUE),rcttks)],
               maj_rcts(max(dpl[dpl$Region==x,grep("up",names(dpl))],na.rm=TRUE))),
    labels=c(rcttks[2:findInterval(max(dpl[dpl$Region==x,grep("up",names(dpl))],na.rm=TRUE),rcttks)],
             maj_rcts(max(dpl[dpl$Region==x,grep("up",names(dpl))],na.rm=TRUE))),
    region=x)
}))
RCTtcks$col <- "1RCT"

In [29]:
#Pour GBD
gbdtks <- c(0,1e7,2e7,3e7,5e7,7.5e7,1e8,1.5e8,2e8)/1e6
maj_gbd <- function(x) ifelse(trunc(x)==x,x,trunc(x) + 1)

In [30]:
GBDtcks <- do.call('rbind',lapply(regs,function(x){
data.frame(
    breaks = unique(DPLOT$ymin[DPLOT$metr=="Burden" & DPLOT$reg==x]) -
        c(gbdtks[2:findInterval(max(dpl[dpl$Region==x,grep("^burden",names(dpl))],na.rm=TRUE)/1e6,gbdtks)],
          maj_gbd(max(dpl[dpl$Region==x,grep("^burden",names(dpl))],na.rm=TRUE)/1e6))*
        1e6*Norm_fact,
    labels=c(gbdtks[2:findInterval(max(dpl[dpl$Region==x,grep("^burden",names(dpl))],na.rm=TRUE)/1e6,gbdtks)],
             maj_gbd(max(dpl[dpl$Region==x,grep("^burden",names(dpl))],na.rm=TRUE)/1e6)),
    region=x)}))
GBDtcks$col <- "2GBD"

In [31]:
#High-income countries, burden from 44 to 45
GBDtcks[GBDtcks$labels==44 & GBDtcks$region=="High-income",1] <- 
GBDtcks[GBDtcks$labels==44 & GBDtcks$region=="High-income",1] - (45-44)*1e6*Norm_fact
GBDtcks[GBDtcks$labels==44 & GBDtcks$region=="High-income",2] <- 45
#Southeast Asia, research from 2000 and 2200 to 2100
RCTtcks[RCTtcks$labels==2200 & RCTtcks$region=="Southeast Asia, East Asia and Oceania",c(1,2)] <- 
RCTtcks[RCTtcks$labels==2200 & RCTtcks$region=="Southeast Asia, East Asia and Oceania",c(1,2)] - (2200-2100)
RCTtcks <- RCTtcks[!(RCTtcks$labels==2000 & RCTtcks$region=="Southeast Asia, East Asia and Oceania"),]
#Southeast Asia, burden from 82 to 80
GBDtcks[GBDtcks$labels==82 & GBDtcks$region=="Southeast Asia, East Asia and Oceania",1] <- 
GBDtcks[GBDtcks$labels==82 & GBDtcks$region=="Southeast Asia, East Asia and Oceania",1] - (80-82)*1e6*Norm_fact
GBDtcks[GBDtcks$labels==82 & GBDtcks$region=="Southeast Asia, East Asia and Oceania",2] <- 80
#Eastern Europe, burden from 54 to 55
GBDtcks[GBDtcks$labels==54 & GBDtcks$region=="Central Europe, Eastern Europe, and Central Asia",1] <- 
GBDtcks[GBDtcks$labels==54 & GBDtcks$region=="Central Europe, Eastern Europe, and Central Asia",1] - (55-54)*1e6*Norm_fact
GBDtcks[GBDtcks$labels==54 & GBDtcks$region=="Central Europe, Eastern Europe, and Central Asia",2] <- 55
#South Asia, burden from 131 to 130
GBDtcks[GBDtcks$labels==131 & GBDtcks$region=="South Asia",1] <- 
GBDtcks[GBDtcks$labels==131 & GBDtcks$region=="South Asia",1] - (130-131)*1e6*Norm_fact
GBDtcks[GBDtcks$labels==131 & GBDtcks$region=="South Asia",2] <- 130
#Latin America, burden from 16 to 15
GBDtcks[GBDtcks$labels==16 & GBDtcks$region=="Latin America and Caribbean",1] <- 
GBDtcks[GBDtcks$labels==16 & GBDtcks$region=="Latin America and Caribbean",1] - (15-16)*1e6*Norm_fact
GBDtcks[GBDtcks$labels==16 & GBDtcks$region=="Latin America and Caribbean",2] <- 15
#Sub-Saharia Afria, burden from 126 to 125
GBDtcks[GBDtcks$labels==126 & GBDtcks$region=="Sub-Saharian Africa",1] <- 
GBDtcks[GBDtcks$labels==126 & GBDtcks$region=="Sub-Saharian Africa",1] - (125-126)*1e6*Norm_fact
GBDtcks[GBDtcks$labels==126 & GBDtcks$region=="Sub-Saharian Africa",2] <- 125

In [32]:
RCTtcks$labels <- as.character(RCTtcks$label)
GBDtcks$labels <- as.character(GBDtcks$label)
tcks <- rbind(RCTtcks,GBDtcks)
tcks$col <- as.factor(tcks$col)

GGPLOT Object


In [33]:
p <- ggplot(DPLOT) +
        geom_rect(aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=ymax,fill=metr)) +
        geom_errorbar(aes(x=x,ymax=ymax,ymin=ymin),size=0.1,width=0.5,data=DPLOT_err) + 
        #Disease numbers
        geom_text(aes( x=xcent,
                        y=ycent,
                        label=dis_nb,
                        hjust=0.5),
                   size=DPLOT$size_dis_lab,
                   col="#42442E") + 
        theme_minimal() + 
        theme(  axis.title.y=element_blank(),
                axis.text.y=element_blank(),
                axis.ticks.y=element_blank(),
                axis.title.x=element_blank(),
                axis.ticks.x=element_blank()
                ) + 
        theme(legend.position = "none") + 
        scale_x_continuous(breaks = NULL,limits = c(0,max(DPLOT$xmax,na.rm=TRUE)+3)) +
        #Region labels
        geom_text(
                  aes(  x=length(dis)+IC+3,
                        y=xmin,
                        label=label,
                        angle=angle,
                        hjust=0.5,vjust=0),
                  data=familyLabelsDF,
                  size=4.3) + 
        #Colors burden and research
        scale_fill_manual(values = c("Burden"="orange","Research"="blue"))

In [34]:
#Tickmarks
p <- p+ scale_y_continuous(minor_breaks = tcks$breaks, breaks=tcks$breaks,
                      labels=rep("",nrow(tcks)),
                    limits=c(min(DPLOT$ymax,na.rm=TRUE)-d_reg/2,max(DPLOT_err$ymax,na.rm=TRUE)+d_reg/2)) + 
        theme(panel.grid.minor=element_line(color="#D3D3D3",size=0.1)) + 
        geom_text(
            aes(x=length(dis)+IC+1.5,
            y=breaks,
            label=labels,
            hjust=0.5),
            data=tcks,
            size=2,
            col=as.numeric(tcks$col))

In [35]:
ggsave(filename = "../Figures/polar_props_RCT_DALYs.pdf",
      plot = p + coord_polar(theta="y",start=alphaStart,direction=-1),
      width=12,height=12)


Warning message:
“Removed 21 rows containing missing values (geom_rect).”Warning message:
“Removed 21 rows containing missing values (geom_errorbar).”Warning message:
“Removed 1 rows containing missing values (geom_text).”

In [ ]: